from transformers import DistilBertTokenizerFast
from transformers import AutoModelForSequenceClassification
from bertviz import model_view
from bertviz import head_view
import numpy as np
import torch
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
model_trained = AutoModelForSequenceClassification.from_pretrained("textattack/distilbert-base-uncased-imdb",
num_labels=2,
output_attentions=True)
model_untrained = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased",
num_labels=2,
output_attentions=True)
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight'] - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight', 'classifier.weight'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
def show_model_view(model, tokenizer, text):
inputs = tokenizer.encode_plus(text, return_tensors='pt', add_special_tokens=True)
input_ids = inputs['input_ids']
attention = model(input_ids)[-1]
input_id_list = input_ids[0].tolist() # Batch index 0
tokens = tokenizer.convert_ids_to_tokens(input_id_list)
model_view(attention, tokens)
def show_model_view_diff(model_trained, model_untrained, tokenizer, text):
inputs = tokenizer.encode_plus(text, return_tensors='pt', add_special_tokens=True)
input_ids = inputs['input_ids']
attention_01 = model_trained(input_ids)[-1]
attention_02 = model_untrained(input_ids)[-1]
input_id_list = input_ids[0].tolist() # Batch index 0
tokens = tokenizer.convert_ids_to_tokens(input_id_list)
attention = ()
for i in range(len(attention_01)):
v = attention_01[i] - attention_02[i]
attention = attention + (v,)
model_view(attention, tokens)
In this attempt we visualize the attention of the models and the difference in the attention of the pretrained and untrained model to see what attention got stronger by training the model on the imdb dataset.
First we define two sentences, one negative and one positive example, and see what the models would predict.
text_neg = "This movie was not really good, I did not enjoy it at all"
text_pos = "The movie was not bad at all, highly recommend"
print("Negative example text: ", text_neg)
inputs = tokenizer.encode_plus(text_neg, return_tensors='pt', add_special_tokens=True)
input_ids = inputs['input_ids']
print("Trained model predictions: ", model_trained(input_ids)['logits'])
print("Untrained model predictions: ", model_untrained(input_ids)['logits'])
Negative example text: This movie was not really good, I did not enjoy it at all Trained model predictions: tensor([[ 2.9077, -3.0004]], grad_fn=<AddmmBackward0>) Untrained model predictions: tensor([[-0.0332, 0.1948]], grad_fn=<AddmmBackward0>)
print("Positive example text: ", text_pos)
inputs = tokenizer.encode_plus(text_pos, return_tensors='pt', add_special_tokens=True)
input_ids = inputs['input_ids']
print("Trained model predictions: ", model_trained(input_ids)['logits'])
print("Untrained model predictions: ", model_untrained(input_ids)['logits'])
Positive example text: The movie was not bad at all, highly recommend Trained model predictions: tensor([[-2.4866, 2.9035]], grad_fn=<AddmmBackward0>) Untrained model predictions: tensor([[0.0016, 0.2703]], grad_fn=<AddmmBackward0>)
Now we will visualize the attention for each model for each sentence to see how the attention looks like.
show_model_view(model_untrained, tokenizer, text=text_pos)
show_model_view(model_untrained, tokenizer, text=text_neg)
show_model_view(model_trained, tokenizer, text=text_pos)
show_model_view(model_trained, tokenizer, text=text_neg)
Now we want to see what the differences are between both attention matrices and will visualize it for each sentence. Important is that it will only show positive attention, that means, that attention that got less than in the untrained model will not be visualized, only attention that got more.
show_model_view_diff(model_trained, model_untrained, tokenizer, text=text_pos)
show_model_view_diff(model_trained, model_untrained, tokenizer, text=text_neg)